library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## Warning: package 'here' was built under R version 4.1.3
## here() starts at C:/Users/mahri/OneDrive/CodeClan/rshiny_dashboard_project/Work In Progress/Demographics
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(ggplot2)
# hosp_activity_page <- read_csv(here("data/hospital_activity_page.csv"))
# hosp_activity_by_speciality <-
# read_csv(here("data/Hospital Activity by Speciality.csv"))
hosp_activity_and_demographics <-
read_csv(here("data/Hospital Activity and Patient Demographics.csv"))
## Rows: 129599 Columns: 19
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (12): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl (7): _id, Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, Len...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
hosp_activity_and_deprivation <-
read_csv(here("data/Hospital Activity and Deprivation.csv"))
## Rows: 40894 Columns: 19
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl (8): _id, SIMD, Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stay...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#
# aAndE_waiting_times <-
# read_excel(here("data/monthly_ae_waitingtimes_A&E attendances and performance data_202201.xlsx"))
covid_ads_HB_deprivation <-
read_excel(here("data/Hospitalisations due to Covid 19/Admissions By Health Board and Deprivation_20220302.xlsx"))
# covid_ads_HB_and_speciality <-
# read_excel(here("data/Hospitalisations due to Covid 19/Admissions By Health Board and Specialty_20220302.xlsx"))
covid_ads_HB_age_sex <-
read_excel(here("data/Hospitalisations due to Covid 19/Admissions By Health Board, Age and Sex_20220302.xlsx"))
covid_ads_HSCP_deprivation <-
read_excel(here("data/Hospitalisations due to Covid 19/Admissions By HSCP and Deprivation_20220302.xlsx"))
# covid_ads_HSCP_speciality <-
# read_excel(here("data/Hospitalisations due to Covid 19/Admissions By HSCP and Specialty_20220302.xlsx"))
covid_ads_HSCP_age_sex <-
read_excel(here("data/Hospitalisations due to Covid 19/Admissions By HSCP, Age and Sex_20220302.xlsx"))
Clean names
hosp_activity_and_demographics <-
janitor::clean_names(hosp_activity_and_demographics)
hosp_activity_and_deprivation <-
janitor::clean_names(hosp_activity_and_deprivation)
covid_ads_HB_deprivation <-
janitor::clean_names(covid_ads_HB_deprivation)
covid_ads_HB_age_sex <-
janitor::clean_names(covid_ads_HB_age_sex)
covid_ads_HSCP_deprivation <-
janitor::clean_names(covid_ads_HSCP_deprivation)
covid_ads_HSCP_age_sex <-
janitor::clean_names(covid_ads_HSCP_age_sex)
Do they match?
names(hosp_activity_and_demographics)
## [1] "id" "quarter"
## [3] "quarter_qf" "hb"
## [5] "hbqf" "location"
## [7] "location_qf" "admission_type"
## [9] "admission_type_qf" "sex"
## [11] "age" "episodes"
## [13] "length_of_episode" "average_length_of_episode"
## [15] "average_length_of_episode_qf" "stays"
## [17] "length_of_stay" "average_length_of_stay"
## [19] "average_length_of_stay_qf"
names(hosp_activity_and_deprivation)
## [1] "id" "quarter"
## [3] "quarter_qf" "hb"
## [5] "hbqf" "location"
## [7] "location_qf" "admission_type"
## [9] "admission_type_qf" "simd"
## [11] "simdqf" "episodes"
## [13] "length_of_episode" "average_length_of_episode"
## [15] "average_length_of_episode_qf" "stays"
## [17] "length_of_stay" "average_length_of_stay"
## [19] "average_length_of_stay_qf"
names(covid_ads_HB_deprivation)
## [1] "week_ending" "hb" "hbqf"
## [4] "simd_quintile" "admission_type" "admission_type_qf"
## [7] "number_admissions" "average20182019" "percent_variation"
names(covid_ads_HB_age_sex)
## [1] "week_ending" "hb" "hbqf"
## [4] "age_group" "age_group_qf" "sex"
## [7] "sex_qf" "admission_type" "admission_type_qf"
## [10] "number_admissions" "average20182019" "percent_variation"
names(covid_ads_HSCP_deprivation)
## [1] "week_ending" "hscp" "simd_quintile"
## [4] "admission_type" "admission_type_qf" "number_admissions"
## [7] "average20182019" "percent_variation"
names(covid_ads_HSCP_age_sex)
## [1] "week_ending" "hscp" "age_group"
## [4] "age_group_qf" "sex" "sex_qf"
## [7] "admission_type" "admission_type_qf" "number_admissions"
## [10] "average20182019" "percent_variation"
hosp_activity_and_demographics %>%
distinct(age)
# 10 year increments i.e. 0-9, 10-19... 80-89, 90 years and over
# No NA values
hosp_activity_and_demographics %>%
mutate(age = is.na(age)) %>%
filter(age == TRUE)
# 7 General hosp admission types for both demographics and deprivation:
# Elective Inpatients
# Emergency Inpatients, (THIS IS THE ONLY DEFINITELY ACUTE ONE)
# Transfers
# All Day cases
# All Inpatients
# All inpatients and Day Cases
# Not Specified
# No NAs.
hosp_activity_and_demographics %>%
distinct(admission_type)
hosp_activity_and_deprivation %>%
distinct(admission_type)
# 3 COVID ADMISSION TYPES for HB and HSCP - deprivation and age/sex:
# All - Emergency - Planned
covid_ads_HB_deprivation %>%
distinct(admission_type)
covid_ads_HB_age_sex %>%
distinct(admission_type)
covid_ads_HSCP_deprivation %>%
distinct(admission_type)
covid_ads_HSCP_age_sex %>%
distinct(admission_type)
# tried them all for NA and there are none
# hosp_activity_and_demographics %>%
# mutate(admission_type = is.na(admission_type)) %>%
# filter(admission_type == TRUE)
# 2 sex = Female, Male
hosp_activity_and_demographics %>%
distinct(sex)
hosp_activity_and_demographics %>%
mutate(sex = is.na(sex)) %>%
filter(sex == TRUE)
# No NAs
Sex against time and admissions
Filter for acute services:
# might want to make use of this if we're not sure about admission types
# acute_target <- c("Emergency Inpatients", "Not Specified")
# Others = Elective Inpatients, Transfers, All Day cases, All Inpatients, All
# Inpatients and Day cases, Not Specified
general_admissions_acute_age_sex <- hosp_activity_and_demographics %>%
filter(admission_type == "Emergency Inpatients")
general_admissions_acute_age_sex
Now group by and create graphs over time:
gen_admissions_sex_per_quarter <- general_admissions_acute_age_sex %>%
group_by(quarter, sex) %>%
summarise(total_admissions_per_quarter = n())
## `summarise()` has grouped output by 'quarter'. You can override using the
## `.groups` argument.
gen_admissions_sex_per_quarter %>%
ggplot()+
aes(x = quarter,
y = total_admissions_per_quarter,
group = sex, colour = sex)+
geom_line() +
labs(x = "Quarter",
y = "Total Admissions",
title = "Total Emergency Inpatient Admissions",
subtitle = "November 2019 to February 2022",
colour = "Sex") +
theme_bw()+
theme(axis.text.x = element_text(angle=45, hjust=0.9))
hosp_activity_and_demographics %>%
distinct(age) # 10 year increments i.e. 0-9, 10-19... 80-89, 90 years and over
# No NA values
hosp_activity_and_demographics %>%
mutate(age = is.na(age)) %>%
filter(age == TRUE) # no NAs
gen_admissions_age_per_quarter <- general_admissions_acute_age_sex %>%
# remember this is only looking at Emergency Inpatients but there are 7 groups
group_by(quarter, age) %>%
summarise(total_admissions_per_quarter = n())
## `summarise()` has grouped output by 'quarter'. You can override using the
## `.groups` argument.
# Note that it's 0-9years that are the low values and I'll make this better soon
gen_admissions_age_per_quarter %>%
ggplot()+
aes(x = quarter,
y = total_admissions_per_quarter,
group = age, colour = age)+
geom_line() +
labs(x = "Yearly Quarter",
y = "Total Admissions",
title = "Total Emergency Inpatient Admissions",
subtitle = "November 2019 to February 2022",
colour = "Age Group") +
theme_bw()+
theme(axis.text.x = element_text(angle=45, hjust=0.9))
gen_admissions_age_per_quarter %>%
ggplot()+
aes(x = quarter,
y = total_admissions_per_quarter,
group = age)+
geom_col() +
facet_wrap(~age)+
labs(x = "Yearly Quarter",
y = "Total Admissions",
title = "JUST MADE THIS TO SEE THE DIFFERENCE - AWARE IT'S MESSY -
Total Emergency Inpatient Admissions",
subtitle = "November 2019 to February 2022") +
theme_bw()+
theme(axis.text.x = element_text(angle=45, hjust=0.9))
# We have the same admission types here as above so I'll do the same - i.e. only
# include the Emergency Inpatients though there are 7 groups
hosp_activity_and_deprivation %>%
distinct(admission_type)
general_admissions_acute_deprivation <- hosp_activity_and_deprivation %>%
filter(admission_type == "Emergency Inpatients")
general_admissions_acute_deprivation
SIMD Types * note there are 962 NAs: there are codes in the simdqf column as to why * sometimes it’s just a general geographical group
general_admissions_acute_deprivation %>%
group_by(simd) %>%
summarise(total_for_each_simd = n())
gen_admissions_simd_per_quarter <- general_admissions_acute_deprivation %>%
group_by(quarter, simd) %>%
drop_na(simd) %>%
summarise(total_admission_per_quarter = n())
## `summarise()` has grouped output by 'quarter'. You can override using the
## `.groups` argument.
gen_admissions_simd_per_quarter
gen_admissions_simd_per_quarter %>%
ggplot() +
aes(x = quarter,
y = total_admission_per_quarter,
fill = simd) +
geom_col(position = "dodge") + #why won' it dodge? Not enough space??
labs(x = "Yearly Quarter",
y = "Total Admissions",
title = "Total Emergency Inpatient Admissions by Deprivation Level")+#,
#colour = "SIMD Level: (1 = Most Deprived : 5 = Least Deprived")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
gen_admissions_simd_per_quarter %>%
ggplot() +
aes(x = quarter,
y = total_admission_per_quarter,
fill = simd) +
geom_col(position = "dodge") +
facet_wrap(~simd)+
labs(x = "Yearly Quarter",
y = "Total Admissions",
title = "Total Emergency Inpatient Admissions by Deprivation Level",
colour = "SIMD Level: (1 = Most Deprived : 5 = Least Deprived")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
# hmmm, that's not a very nice line graph:
gen_admissions_simd_per_quarter %>%
ggplot() +
aes(x = quarter,
y = total_admission_per_quarter,
group = simd,
colour = simd) +
geom_line() +
labs(x = "Yearly Quarter",
y = "Total Admissions",
title = "Total Emergency Inpatient Admissions by Deprivation Level")+#,
#colour = "SIMD Level: (1 = Most Deprived : 5 = Least Deprived")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 0.9))
## COVID ADMISSIONS
NOTE sex: “All” = 33951 rows, Female = 4782, Male = 4783
# by healthboard
# admission types are either emergncy or planned - that's good news for us
covid_ads_HB_age_sex %>%
distinct(admission_type)
covid_acute_HB_age_sex <- covid_ads_HB_age_sex %>%
filter(admission_type == "Emergency")
# this is not neatly divided and there is an age group "41760" hmm
covid_acute_HB_age_sex %>%
group_by(age_group) %>%
summarise(count_in_age_group = n())
# nope
covid_acute_HB_age_sex %>%
group_by(age_group) %>%
ggplot()+
aes(x = number_admissions,
y = average20182019,
fill = age_group)+
geom_point()
covid_ads_HB_age_sex %>%
group_by(sex) %>%
summarise(total_of_each_sex = n())
covid_ads_HB_demographics_sex <- covid_ads_HB_age_sex %>%
group_by(week_ending, sex) %>%
summarise(total_admissions_for_week_ending = n())
## `summarise()` has grouped output by 'week_ending'. You can override using the
## `.groups` argument.
covid_ads_HB_demographics_sex
covid_ads_HB_demographics_sex %>%
ggplot()+
aes(x = week_ending,
y = total_admissions_for_week_ending,
group = sex,
colour = sex)+
geom_line()+
labs(title = "I AM AWARE THIS IS AWFUL")